#!/usr/bin/env python
# -*- coding: utf-8 -*-
from nltk.corpus import stopwords
import sys
import json

def get_text(obj):
    text = obj['text']
    status = obj['status']
    num = 1 + obj['retweet']
    if 1 == status:
        text += ' ' + obj['quoted_quoted_text']
        num += obj['quoted_retweet']
    elif 2 == status:
        text += ' ' + obj['retweeted_text']
        num += obj['retweeted_retweet']
    elif 3 == status:
        text += ' ' + obj['retweeted_text'] + ' ' + obj['quoted_quoted_text']
        num += obj['retweeted_retweet'] + obj['quoted_retweet']
    else:
        text += ''
    return [text, num]

def main():
    for line in sys.stdin:
        obj = json.loads(line)
        arr = get_text(obj)
        text = arr[0]
        num = arr[1]
        stop = stopwords.words('english')
        operators = ['http', 'co', 'rt', 'https', 'via', 'amp']
        for word in text.split():
            if word.lower() not in stop \
                and word.lower() not in operators:
                print '%s\t%s' % (word.lower(), num)

if __name__ == '__main__':
    main()     
    
